//----------- Data Merging & Cleaning ---------------//

drop _all
clear matrix
global DIR "C:\Replication package"
cd "${DIR}"

*** PREPARE MACRO DATA ***

import excel "${DIR}\Input data\INSEE_10_ECC-15_FIN-15A_SoldeBudgetaire.xlsx", sheet("stata") cellrange(A1:B24) firstrow clear
save "soldebudg.dta", replace

use "${DIR}\Input data\MPS-MacroData.dta", clear
rename date year
tsset year
merge 1:1 year using "soldebudg.dta"
drop if _merge==2
drop _merge
tsset year
tsfilter cf outputgap = gdp
gen og = outputgap/gdp*100
reg soldebudg og, vce(rob)
predict soldestruc, resid
label variable soldestruc soldestruc
gen ib = -(soldestruc - L.soldestruc)
foreach var in ir_10y unemp {
gen `var'_d = `var' - L.`var'
}
foreach var in stress stockr house oil exch {
gen `var'_d = ((`var'/L.`var')-1)
}
save "MPS-MacroData_temp.dta", replace

*** PREPARE SECTOR DATA ***

use "${DIR}\FIBEN data\FIBEN_sectoral_analysis.dta", clear
collapse (mean) avg_intK1=int_K avg_intK2=int_K_2 avg_intK3=int_K_3 avg_lev=levier ///
(median) med_intK1=int_K med_intK2=int_K_2 med_intK3=int_K_3 med_lev=levier, by(NAF2_new)
drop if NAF2_new=="MISS"
save "sectoral_analysis_temp.dta", replace

*** APPEND 2004-2008 and 2008-2019 ***

use "${DIR}\SRCV data\ind_men_2004_2008_agregats.dta", clear
drop if annee_SRCV==2008 
append using "${DIR}\SRCV data\ind_men_2008_2019_agregats.dta"
save "ind_men_all_agregats.dta", replace
//
use "${DIR}\SRCV data\autres_menages_2004_2008_def.dta", clear
drop if annee_SRCV==2008 
append using "${DIR}\SRCV data\autres_menages_2008_2019_def.dta"
save "autres_menages_all_def.dta", replace
//
use "${DIR}\SRCV data\autres_individus_2004_2008_def.dta", clear
drop if annee_SRCV==2008 
append using "${DIR}\SRCV data\autres_individus_2008_2019_def.dta"
save "autres_individus_all_def.dta", replace
//
use "${DIR}\SRCV data\autres_individus_act_2004_2008_def.dta", clear
drop if annee_SRCV==2008 
append using "${DIR}\SRCV data\autres_individus_act_2008_2019_def.dta"
save "autres_individus_act_all_def.dta", replace

*** MERGE DATA FOR 2020 ***

use "${DIR}\SRCV data\ind_men_2020_agregats.dta", clear
merge m:1 ident_men_ue annee_SRCV using "${DIR}\SRCV data\autres_men_2020_def.dta"
drop if _merge==2
drop _merge
merge 1:1 rb030 annee_SRCV using "${DIR}\SRCV data\autres_indiv_2020_def.dta"
drop if _merge==2
drop _merge
merge 1:1 rb030 annee_SRCV using "${DIR}\SRCV data\autres_indiv_act_2020_def.dta"
drop if _merge==2
drop _merge
save "ind_men_2020_allmerged.dta", replace

*** MERGE DATA FOR 2004-2019 ***

use "ind_men_all_agregats.dta", clear
merge m:1 idmenc men annee using "autres_menages_all_def.dta"
drop if _merge==2
drop _merge
//
merge 1:1 idindc ind annee using "autres_individus_all_def.dta"
drop if _merge==2
drop _merge
//
merge 1:1 idindc ind annee using "autres_individus_act_all_def.dta"
drop if _merge==2
drop _merge

*** APPEND 2004-2019 and 2020 ***

append using "ind_men_2020_allmerged.dta"

*** MERGE WITH MPS & MACRO DATA ***

merge m:1 year using "MPS-MacroData_temp.dta"
drop if _merge==2
drop _merge
//
egen id = group(ind)
order men ind id
sort id year
by id year: gen dup = cond(_N==1,0,_n)
drop if dup>1
drop dup

*** RECLASSIFY SECTORS ***

cap drop fe_sector_*
gen NAF2_temp = NAF2_REV2_1P
replace NAF2_temp = "A" if NAF2_REV2_2P==01 | NAF2_REV2_2P==02 | NAF2_REV2_2P==03
replace NAF2_temp = "B" if NAF2_REV2_2P==05 | NAF2_REV2_2P==06 | NAF2_REV2_2P==07 | NAF2_REV2_2P==08 ///
| NAF2_REV2_2P==09
replace NAF2_temp = "C" if NAF2_REV2_2P==10 | NAF2_REV2_2P==11 | NAF2_REV2_2P==12 | NAF2_REV2_2P==13 ///
| NAF2_REV2_2P==14 | NAF2_REV2_2P==15 | NAF2_REV2_2P==16 | NAF2_REV2_2P==17 | NAF2_REV2_2P==18 ///
| NAF2_REV2_2P==19 | NAF2_REV2_2P==20 | NAF2_REV2_2P==21 | NAF2_REV2_2P==22 | NAF2_REV2_2P==23 ///
| NAF2_REV2_2P==24 | NAF2_REV2_2P==25 | NAF2_REV2_2P==26 | NAF2_REV2_2P==27 | NAF2_REV2_2P==28 ///
| NAF2_REV2_2P==29 | NAF2_REV2_2P==30 | NAF2_REV2_2P==31 | NAF2_REV2_2P==32 | NAF2_REV2_2P==33
replace NAF2_temp = "D" if NAF2_REV2_2P==35
replace NAF2_temp = "E" if NAF2_REV2_2P==36 | NAF2_REV2_2P==37 | NAF2_REV2_2P==38 | NAF2_REV2_2P==39
replace NAF2_temp = "F" if NAF2_REV2_2P==41 | NAF2_REV2_2P==42 | NAF2_REV2_2P==43 
replace NAF2_temp = "G" if NAF2_REV2_2P==45 | NAF2_REV2_2P==46 | NAF2_REV2_2P==47 
replace NAF2_temp = "H" if NAF2_REV2_2P==49 | NAF2_REV2_2P==50 | NAF2_REV2_2P==51 | NAF2_REV2_2P==52 ///
| NAF2_REV2_2P==53 
replace NAF2_temp = "I" if NAF2_REV2_2P==55 | NAF2_REV2_2P==56
replace NAF2_temp = "J" if NAF2_REV2_2P==58 | NAF2_REV2_2P==59 | NAF2_REV2_2P==60 | NAF2_REV2_2P==61 /// 
| NAF2_REV2_2P==62 | NAF2_REV2_2P==63
replace NAF2_temp = "K" if NAF2_REV2_2P==64 | NAF2_REV2_2P==65 | NAF2_REV2_2P==66
replace NAF2_temp = "L" if NAF2_REV2_2P==68 
replace NAF2_temp = "M" if NAF2_REV2_2P==69 | NAF2_REV2_2P==70 | NAF2_REV2_2P==71 | NAF2_REV2_2P==72 ///
| NAF2_REV2_2P==73 | NAF2_REV2_2P==74 | NAF2_REV2_2P==75 
replace NAF2_temp = "N" if NAF2_REV2_2P==77 | NAF2_REV2_2P==78 | NAF2_REV2_2P==79 | NAF2_REV2_2P==80 ///
| NAF2_REV2_2P==81 | NAF2_REV2_2P==82
replace NAF2_temp = "O" if NAF2_REV2_2P==84
replace NAF2_temp = "P" if NAF2_REV2_2P==85
replace NAF2_temp = "Q" if NAF2_REV2_2P==86 | NAF2_REV2_2P==87 | NAF2_REV2_2P==88
replace NAF2_temp = "R" if NAF2_REV2_2P==90 | NAF2_REV2_2P==91 | NAF2_REV2_2P==92 | NAF2_REV2_2P==93
replace NAF2_temp = "S" if NAF2_REV2_2P==94 | NAF2_REV2_2P==95 | NAF2_REV2_2P==96 
replace NAF2_temp = "T" if NAF2_REV2_2P==97 | NAF2_REV2_2P==98
replace NAF2_temp = "U" if NAF2_REV2_2P==99
replace NAF2_temp = "MISS" if NAF2_temp==""
sort id year
gen NAF2_new = NAF2_temp
bysort id: replace NAF2_new = NAF2_temp[_n-1] if NAF2_temp=="MISS" & NAF2_temp[_n-1]==NAF2_temp[_n+1]
replace NAF2_new = NAF2_temp if NAF2_new==""
encode NAF2_new, gen(sector)
tab NAF2_new, gen(fe_sector_)
drop fe_sector_14
drop NAF2_temp

*** MERGE WITH SECTOR DATA ***

merge m:1 NAF2_new using "sectoral_analysis_temp.dta"
drop if _merge==2
drop _merge
//
xtset id year
compress

save "${DIR}\FINAL_DATABASE.dta", replace

*** Clean directory
cap erase "soldebudg.dta"
cap erase "MPS-MacroData_temp.dta"
cap erase "sectoral_analysis_temp.dta"
cap erase "ind_men_all_agregats.dta"
cap erase "autres_menages_all_def.dta"
cap erase "autres_individus_all_def.dta"
cap erase "autres_individus_act_all_def.dta"
cap erase "ind_men_2020_allmerged.dta"

